<!DOCTYPE html>
<html lang="en-us">
  <head>

    <meta http-equiv="content-type" content="text/html; charset=utf-8">
    
<meta charset="UTF-8">
<title>Algorithmic Stemmers | Elasticsearch: The Definitive Guide [2.x] | Elastic</title>
<link rel="home" href="index.html" title="Elasticsearch: The Definitive Guide [2.x]">
<link rel="up" href="stemming.html" title="Reducing Words to Their Root Form">
<link rel="prev" href="stemming.html" title="Reducing Words to Their Root Form">
<link rel="next" href="dictionary-stemmers.html" title="Dictionary Stemmers">
<meta name="DC.type" content="Learn/Docs/Legacy/Elasticsearch/Definitive Guide/2.x">
<meta name="DC.subject" content="Elasticsearch">
<meta name="DC.identifier" content="2.x">
<meta name="robots" content="noindex,nofollow">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <script src="https://cdn.optimizely.com/js/18132920325.js"></script>
    <link rel="apple-touch-icon" sizes="57x57" href="/apple-icon-57x57.png">
    <link rel="apple-touch-icon" sizes="60x60" href="/apple-icon-60x60.png">
    <link rel="apple-touch-icon" sizes="72x72" href="/apple-icon-72x72.png">
    <link rel="apple-touch-icon" sizes="76x76" href="/apple-icon-76x76.png">
    <link rel="apple-touch-icon" sizes="114x114" href="/apple-icon-114x114.png">
    <link rel="apple-touch-icon" sizes="120x120" href="/apple-icon-120x120.png">
    <link rel="apple-touch-icon" sizes="144x144" href="/apple-icon-144x144.png">
    <link rel="apple-touch-icon" sizes="152x152" href="/apple-icon-152x152.png">
    <link rel="apple-touch-icon" sizes="180x180" href="/apple-icon-180x180.png">
    <link rel="icon" type="image/png" href="/favicon-32x32.png" sizes="32x32">
    <link rel="icon" type="image/png" href="/android-chrome-192x192.png" sizes="192x192">
    <link rel="icon" type="image/png" href="/favicon-96x96.png" sizes="96x96">
    <link rel="icon" type="image/png" href="/favicon-16x16.png" sizes="16x16">
    <link rel="manifest" href="/manifest.json">
    <meta name="apple-mobile-web-app-title" content="Elastic">
    <meta name="application-name" content="Elastic">
    <meta name="msapplication-TileColor" content="#ffffff">
    <meta name="msapplication-TileImage" content="/mstile-144x144.png">
    <meta name="theme-color" content="#ffffff">
    <meta name="naver-site-verification" content="936882c1853b701b3cef3721758d80535413dbfd">
    <meta name="yandex-verification" content="d8a47e95d0972434">
    <meta name="localized" content="true">
    <meta name="st:robots" content="follow,index">
    <meta property="og:image" content="https://www.elastic.co/static/images/elastic-logo-200.png">
    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
    <link rel="icon" href="/favicon.ico" type="image/x-icon">
    <link rel="apple-touch-icon-precomposed" sizes="64x64" href="/favicon_64x64_16bit.png">
    <link rel="apple-touch-icon-precomposed" sizes="32x32" href="/favicon_32x32.png">
    <link rel="apple-touch-icon-precomposed" sizes="16x16" href="/favicon_16x16.png">
    <!-- Give IE8 a fighting chance -->
    <!--[if lt IE 9]>
    <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
    <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
    <![endif]-->
    <link rel="stylesheet" type="text/css" href="/guide/static/styles.css">
  </head>

  <!--© 2015-2021 Elasticsearch B.V. Copying, publishing and/or distributing without written permission is strictly prohibited.-->

  <body>
    <!-- Google Tag Manager -->
    <script>dataLayer = [];</script><noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-58RLH5" height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= '//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); })(window,document,'script','dataLayer','GTM-58RLH5');</script>
    <!-- End Google Tag Manager -->

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=UA-12395217-16"></script>
    <script>
      window.dataLayer = window.dataLayer || [];
      function gtag(){dataLayer.push(arguments);}
      gtag('js', new Date());
      gtag('config', 'UA-12395217-16');
    </script>

    <!--BEGIN QUALTRICS WEBSITE FEEDBACK SNIPPET-->
    <script type="text/javascript">
      (function(){var g=function(e,h,f,g){
      this.get=function(a){for(var a=a+"=",c=document.cookie.split(";"),b=0,e=c.length;b<e;b++){for(var d=c[b];" "==d.charAt(0);)d=d.substring(1,d.length);if(0==d.indexOf(a))return d.substring(a.length,d.length)}return null};
      this.set=function(a,c){var b="",b=new Date;b.setTime(b.getTime()+6048E5);b="; expires="+b.toGMTString();document.cookie=a+"="+c+b+"; path=/; "};
      this.check=function(){var a=this.get(f);if(a)a=a.split(":");else if(100!=e)"v"==h&&(e=Math.random()>=e/100?0:100),a=[h,e,0],this.set(f,a.join(":"));else return!0;var c=a[1];if(100==c)return!0;switch(a[0]){case "v":return!1;case "r":return c=a[2]%Math.floor(100/c),a[2]++,this.set(f,a.join(":")),!c}return!0};
      this.go=function(){if(this.check()){var a=document.createElement("script");a.type="text/javascript";a.src=g;document.body&&document.body.appendChild(a)}};
      this.start=function(){var a=this;window.addEventListener?window.addEventListener("load",function(){a.go()},!1):window.attachEvent&&window.attachEvent("onload",function(){a.go()})}};
      try{(new g(100,"r","QSI_S_ZN_emkP0oSe9Qrn7kF","https://znemkp0ose9qrn7kf-elastic.siteintercept.qualtrics.com/WRSiteInterceptEngine/?Q_ZID=ZN_emkP0oSe9Qrn7kF")).start()}catch(i){}})();
    </script><div id="ZN_emkP0oSe9Qrn7kF"><!--DO NOT REMOVE-CONTENTS PLACED HERE--></div>
    <!--END WEBSITE FEEDBACK SNIPPET-->

    <div id="elastic-nav" style="display:none;"></div>
    <script src="https://www.elastic.co/elastic-nav.js"></script>

    <!-- Subnav -->
    <div>
      <div>
        <div class="tertiary-nav d-none d-md-block">
          <div class="container">
            <div class="p-t-b-15 d-flex justify-content-between nav-container">
              <div class="breadcrum-wrapper"><span><a href="/guide/" style="font-size: 14px; font-weight: 600; color: #000;">Docs</a></span></div>
            </div>
          </div>
        </div>
      </div>
    </div>

    <div class="main-container">
      <section id="content">
        <div class="content-wrapper">

          <section id="guide" lang="en">
            <div class="container">
              <div class="row">
                <div class="col-xs-12 col-sm-8 col-md-8 guide-section">
                  <!-- start body -->
                  <div class="page_header">
<p>
  <strong>WARNING</strong>: The 2.x versions of Elasticsearch have passed their
  <a href="https://www.elastic.co/support/eol">EOL dates</a>. If you are running
  a 2.x version, we strongly advise you to upgrade.
</p>
<p>
  This documentation is no longer maintained and may be removed. For the latest
  information, see the <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html">current
  Elasticsearch documentation</a>.
</p>
</div>
<div id="content">
<div class="breadcrumbs">
<span class="breadcrumb-link"><a href="index.html">Elasticsearch: The Definitive Guide [2.x]</a></span>
»
<span class="breadcrumb-link"><a href="languages.html">Dealing with Human Language</a></span>
»
<span class="breadcrumb-link"><a href="stemming.html">Reducing Words to Their Root Form</a></span>
»
<span class="breadcrumb-node">Algorithmic Stemmers</span>
</div>
<div class="navheader">
<span class="prev">
<a href="stemming.html">« Reducing Words to Their Root Form</a>
</span>
<span class="next">
<a href="dictionary-stemmers.html">Dictionary Stemmers »</a>
</span>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h2 class="title">
<a id="algorithmic-stemmers"></a>Algorithmic Stemmers<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/230_Stemming/10_Algorithmic_stemmers.asciidoc">edit</a>
</h2>
</div></div></div>
<p>Most of the stemmers available in Elasticsearch are algorithmic in that they
apply a series of rules to a word in order to reduce it to its root form, such
as stripping the final <code class="literal">s</code> or <code class="literal">es</code> from plurals.   They don’t have to know
anything about individual words in order to stem them.</p>
<p>These algorithmic stemmers have the advantage that they are available out of
the box, are fast, use little memory, and work well for regular words.  The
downside is that they don’t cope well with irregular words like <code class="literal">be</code>, <code class="literal">are</code>,
and <code class="literal">am</code>, or <code class="literal">mice</code> and <code class="literal">mouse</code>.</p>
<p>One of the earliest stemming algorithms is the Porter stemmer for English,
which is still the recommended English stemmer today.  Martin Porter
subsequently went on to create the
<a href="http://snowball.tartarus.org/" class="ulink" target="_top">Snowball language</a> for creating stemming
algorithms, and a number of the stemmers available in Elasticsearch are
written in Snowball.</p>
<div class="tip admon">
<div class="icon"></div>
<div class="admon_content">
<p>The <a href="/guide/en/elasticsearch/reference/2.4/analysis-kstem-tokenfilter.html" class="ulink" target="_top"><code class="literal">kstem</code> token filter</a> is a stemmer
for English which combines the algorithmic approach with a built-in
dictionary. The dictionary contains a list of root words and exceptions in
order to avoid conflating words incorrectly. <code class="literal">kstem</code> tends to stem less
aggressively than the Porter stemmer.</p>
</div>
</div>
<div class="section">
<div class="titlepage"><div><div>
<h3 class="title">
<a id="_using_an_algorithmic_stemmer"></a>Using an Algorithmic Stemmer<a class="edit_me edit_me_private" rel="nofollow" title="Editing on GitHub is available to Elastic" href="https://github.com/elastic/elasticsearch-definitive-guide/edit/2.x/230_Stemming/10_Algorithmic_stemmers.asciidoc">edit</a>
</h3>
</div></div></div>
<p>While you can use the
<a href="/guide/en/elasticsearch/reference/2.4/analysis-porterstem-tokenfilter.html" class="ulink" target="_top"><code class="literal">porter_stem</code></a> or
<a href="/guide/en/elasticsearch/reference/2.4/analysis-kstem-tokenfilter.html" class="ulink" target="_top"><code class="literal">kstem</code></a> token filter directly, or
create a language-specific Snowball stemmer with the
<a href="/guide/en/elasticsearch/reference/2.4/analysis-snowball-tokenfilter.html" class="ulink" target="_top"><code class="literal">snowball</code></a> token filter, all of the
algorithmic stemmers are exposed via a single unified interface:
the <a href="/guide/en/elasticsearch/reference/2.4/analysis-stemmer-tokenfilter.html" class="ulink" target="_top"><code class="literal">stemmer</code> token filter</a>, which
accepts the <code class="literal">language</code> parameter.</p>
<p>For instance, perhaps you find the default stemmer used by the <code class="literal">english</code>
analyzer to be too aggressive and you want to make it less aggressive.
The first step is to look up the configuration for the <code class="literal">english</code> analyzer
in the <a href="/guide/en/elasticsearch/reference/2.4/analysis-lang-analyzer.html" class="ulink" target="_top">language analyzers</a>
documentation, which shows the following:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">{
  "settings": {
    "analysis": {
      "filter": {
        "english_stop": {
          "type":       "stop",
          "stopwords":  "_english_"
        },
        "english_keywords": {
          "type":       "keyword_marker", <a id="CO154-1"></a><i class="conum" data-value="1"></i>
          "keywords":   []
        },
        "english_stemmer": {
          "type":       "stemmer",
          "language":   "english" <a id="CO154-2"></a><i class="conum" data-value="2"></i>
        },
        "english_possessive_stemmer": {
          "type":       "stemmer",
          "language":   "possessive_english" <a id="CO154-3"></a><i class="conum" data-value="2"></i>
        }
      },
      "analyzer": {
        "english": {
          "tokenizer":  "standard",
          "filter": [
            "english_possessive_stemmer",
            "lowercase",
            "english_stop",
            "english_keywords",
            "english_stemmer"
          ]
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO154-1"><i class="conum" data-value="1"></i></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">keyword_marker</code> token filter lists words that should not be
stemmed.  This defaults to the empty list.</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO154-2"><i class="conum" data-value="2"></i></a><a href="#CO154-3"></a></p>
</td>
<td align="left" valign="top">
<p>The <code class="literal">english</code> analyzer uses two stemmers: the <code class="literal">possessive_english</code>
and the <code class="literal">english</code> stemmer. The possessive stemmer removes <code class="literal">'s</code>
from any words before passing them on to the <code class="literal">english_stop</code>,
<code class="literal">english_keywords</code>, and <code class="literal">english_stemmer</code>.</p>
</td>
</tr>
</table>
</div>
<p>Having reviewed the current configuration, we can use it as the basis for
a new analyzer, with the following changes:</p>
<div class="ulist itemizedlist">
<ul class="itemizedlist">
<li class="listitem">
Change the <code class="literal">english_stemmer</code> from <code class="literal">english</code> (which maps to the
<a href="/guide/en/elasticsearch/reference/2.4/analysis-porterstem-tokenfilter.html" class="ulink" target="_top"><code class="literal">porter_stem</code></a> token filter)
to <code class="literal">light_english</code> (which maps to the less aggressive
<a href="/guide/en/elasticsearch/reference/2.4/analysis-kstem-tokenfilter.html" class="ulink" target="_top"><code class="literal">kstem</code></a> token filter).
</li>
<li class="listitem">
Add the <a class="xref" href="asciifolding-token-filter.html" title="You Have an Accent"><code class="literal">asciifolding</code></a> token filter to
remove any diacritics from foreign words.
</li>
<li class="listitem">
Remove the <code class="literal">keyword_marker</code> token filter, as we don’t need it.
(We discuss this in more detail in <a class="xref" href="controlling-stemming.html" title="Controlling Stemming">Controlling Stemming</a>.)
</li>
</ul>
</div>
<p>Our new custom analyzer would look like this:</p>
<div class="pre_wrapper lang-js">
<pre class="programlisting prettyprint lang-js">PUT /my_index
{
  "settings": {
    "analysis": {
      "filter": {
        "english_stop": {
          "type":       "stop",
          "stopwords":  "_english_"
        },
        "light_english_stemmer": {
          "type":       "stemmer",
          "language":   "light_english" <a id="CO155-1"></a><i class="conum" data-value="1"></i>
        },
        "english_possessive_stemmer": {
          "type":       "stemmer",
          "language":   "possessive_english"
        }
      },
      "analyzer": {
        "english": {
          "tokenizer":  "standard",
          "filter": [
            "english_possessive_stemmer",
            "lowercase",
            "english_stop",
            "light_english_stemmer", <a id="CO155-2"></a><i class="conum" data-value="1"></i>
            "asciifolding" <a id="CO155-3"></a><i class="conum" data-value="2"></i>
          ]
        }
      }
    }
  }
}</pre>
</div>
<div class="calloutlist">
<table border="0" summary="Callout list">
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO155-1"><i class="conum" data-value="1"></i></a><a href="#CO155-2"></a></p>
</td>
<td align="left" valign="top">
<p>Replaced the <code class="literal">english</code> stemmer with the less aggressive
<code class="literal">light_english</code> stemmer</p>
</td>
</tr>
<tr>
<td align="left" valign="top" width="5%">
<p><a href="#CO155-3"><i class="conum" data-value="2"></i></a></p>
</td>
<td align="left" valign="top">
<p>Added the <code class="literal">asciifolding</code> token filter</p>
</td>
</tr>
</table>
</div>
</div>

</div>
<div class="navfooter">
<span class="prev">
<a href="stemming.html">« Reducing Words to Their Root Form</a>
</span>
<span class="next">
<a href="dictionary-stemmers.html">Dictionary Stemmers »</a>
</span>
</div>
</div>

                  <!-- end body -->
                </div>
                <div class="col-xs-12 col-sm-4 col-md-4" id="right_col">
                  <div id="rtpcontainer" style="display: block;">
                    <div class="mktg-promo">
                      <h3>Most Popular</h3>
                      <ul class="icons">
                        <li class="icon-elasticsearch-white"><a href="https://www.elastic.co/webinars/getting-started-elasticsearch?baymax=default&amp;elektra=docs&amp;storm=top-video">Get Started with Elasticsearch: Video</a></li>
                        <li class="icon-kibana-white"><a href="https://www.elastic.co/webinars/getting-started-kibana?baymax=default&amp;elektra=docs&amp;storm=top-video">Intro to Kibana: Video</a></li>
                        <li class="icon-logstash-white"><a href="https://www.elastic.co/webinars/introduction-elk-stack?baymax=default&amp;elektra=docs&amp;storm=top-video">ELK for Logs &amp; Metrics: Video</a></li>
                      </ul>
                    </div>
                  </div>
                </div>
              </div>
            </div>
          </section>

        </div>


<div id="elastic-footer"></div>
<script src="https://www.elastic.co/elastic-footer.js"></script>
<!-- Footer Section end-->

      </section>
    </div>

<script src="/guide/static/jquery.js"></script>
<script type="text/javascript" src="/guide/static/docs.js"></script>
<script type="text/javascript">
  window.initial_state = {}</script>
  </body>
</html>
